In [6]:
import altair as alt
import pandas as pd
import numpy as np
alt.data_transformers.enable("vegafusion")
Out[6]:
DataTransformerRegistry.enable('vegafusion')
In [7]:
df = pd.read_csv("../data1.csv")
# df.age.unique()
df
Out[7]:
| Area Code | location | Unit | measure | sex | age | cause | metric | year | mortality_value | upper_CI | lower_CI | pesticide_tonnes | % of GDP | mortality_class | region_type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5500 | Oceania | kg/cap | Deaths | Female | 25+ years | Sepsis and Inf. | Number | 2018 | 0.115895 | 0.175355 | 0.074152 | 2.06 | NaN | low end | Non-Europe |
| 1 | 5501 | Oceania | kg/cap | Deaths | Female | 25+ years | Sepsis and Inf. | Number | 2018 | 0.115895 | 0.175355 | 0.074152 | 2.84 | NaN | low end | Non-Europe |
| 2 | 5502 | Oceania | kg/cap | Deaths | Female | 25+ years | Sepsis and Inf. | Number | 2018 | 0.115895 | 0.175355 | 0.074152 | 0.17 | NaN | low end | Non-Europe |
| 3 | 5504 | Oceania | kg/cap | Deaths | Female | 25+ years | Sepsis and Inf. | Number | 2018 | 0.115895 | 0.175355 | 0.074152 | 0.98 | NaN | low end | Non-Europe |
| 4 | 5500 | Oceania | kg/cap | Deaths | Female | <20 years | Sepsis and Inf. | Number | 2018 | 0.004698 | 0.007462 | 0.002894 | 2.06 | NaN | low end | Non-Europe |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32331 | 5404 | Western Europe | kg/cap | Deaths | Female | 25+ years | Gonococcal Inf. | Number | 2023 | 8.607440 | 10.822334 | 6.696607 | 0.63 | NaN | low end | Non-Europe |
| 32332 | 5404 | Western Europe | kg/cap | Deaths | Male | <20 years | Gonococcal Inf. | Number | 2023 | 0.029280 | 0.039035 | 0.021389 | 0.63 | NaN | low end | Non-Europe |
| 32333 | 5404 | Western Europe | kg/cap | Deaths | Female | <20 years | Gonococcal Inf. | Number | 2023 | 0.080631 | 0.099697 | 0.066684 | 0.63 | NaN | low end | Non-Europe |
| 32334 | 5404 | Western Europe | kg/cap | Deaths | Male | 55+ years | Gonococcal Inf. | Number | 2023 | 0.883928 | 1.233798 | 0.642922 | 0.63 | NaN | low end | Non-Europe |
| 32335 | 5404 | Western Europe | kg/cap | Deaths | Female | 55+ years | Gonococcal Inf. | Number | 2023 | 7.413664 | 9.356024 | 5.630986 | 0.63 | NaN | low end | Non-Europe |
32336 rows × 16 columns
In [8]:
#binning ages into 3 categories
age_map = {
"<1 year": "Children",
"<5 years": "Children",
"<20 years": "Children",
"25+ years": "Adults",
"55+ years": "Older Adults",
"70+ years": "Older Adults",
"All ages": None
}
df["age_group"] = df["age"].map(age_map)
In [9]:
df["mortality_log"] = np.log10(df["mortality_value"].replace(0, np.nan))
In [10]:
master = df[[
"year", "region_type", "sex", "age_group", "cause", "mortality_value"
]].copy()
data_sex = master[master["sex"].isin(["Male","Female"])]
data_age = (
master.groupby(["year", "age_group", "sex", "region_type"], as_index=False)
.agg({"mortality_value": "mean"})
)
data_age = data_age[data_age["age_group"]!= "All ages"]
data_agg = (
master.groupby(["age_group","cause","region_type"], as_index=False)
.agg({"mortality_value": "mean"})
)
In [11]:
# Parameters and selections
region_param = alt.param(
name="region_param",
bind=alt.binding_select(
options=["Europe", "Non-Europe"],
name="Region: "
),
value="Europe"
)
cause_select = alt.selection_point(fields=["cause"])
# Line chart
line = (
alt.Chart(data_age)
.add_params(region_param)
.transform_filter(alt.datum.region_type == region_param)
.mark_line(point=True)
.encode(
x=alt.X("year:O", title="Year"),
y=alt.Y("mortality_value:Q", title="Mortality Value"),
color=alt.Color(
"sex:N",
scale=alt.Scale(
domain=["Female", "Male"],
range=["#ff4fa3", "#4a90e2"]
)
),
facet=alt.Facet("age_group:N", columns=3)
)
.properties(width=220, height=180)
.properties(title="Mortality Trends by Age Group")
).resolve_scale(y='independent')
# Bubble chart
bubble = (
alt.Chart(data_agg)
.mark_circle(opacity=0.8, stroke="black", strokeWidth=1)
.encode(
x=alt.X("cause:N", title="Cause", axis=alt.Axis(labelAngle=-45)),
y=alt.Y("age_group:N", title="Age Group"),
size=alt.Size("mortality_value:Q", scale=alt.Scale(range=[100, 1000]),
title="Mortality"),
color=alt.condition(
cause_select,
alt.value("steelblue"),
alt.value("lightgray")
),
tooltip=["cause:N", "age_group:N", alt.Tooltip("mortality_value:Q", format=".2f")]
)
.transform_filter(alt.datum.region_type == region_param)
.add_params(cause_select)
.properties(width=400, height=300, title="Cause × Age Group (Click to Filter)")
)
# Density chart
density = (
alt.Chart(data_sex)
.transform_filter(alt.datum.region_type == region_param,alt.datum.mortality_value < 20000)
.transform_filter(cause_select)
.transform_density(
"mortality_value",
groupby=["sex"],
as_=["mortality_value", "density"]
)
.mark_area(opacity=0.5)
.encode(
x=alt.X("mortality_value:Q", title="Mortality Value"),
y=alt.Y("density:Q", title="Density"),
color=alt.Color(
"sex:N",
scale=alt.Scale(
domain=["Female", "Male"],
range=["#ff4fa3", "#4a90e2"]
),
title="Sex"
)
)
.properties(width=400, height=300, title="Mortality Distribution by Sex")
)
# Combine into dashboard
dashboard = line & (density | bubble)
dashboard
Out[11]: